Histogram

Regular histogram

  • Binwidth for seting the width of the bins
  • Bins for setting the number of bins in the histogram
movies %>% 
ggplot(aes(x = rating)) +
  geom_histogram(binwidth = 0.1, color = 'red', fill = 'pink', alpha = 0.5 ) 

# alpha for transparency. 0 means no fill color, 1 means solid  fill color
# color for the edges of the histogram, fill for fill in the histogram

Better histogram

use count to color the histogram

library(ggplot2movies)
library(dplyr)

movies %>% 
ggplot(aes(x = rating)) +
  geom_histogram(binwidth = 0.1, aes(fill = ..count..)) 

# alpha for transparency. 0 means no fill color, 1 means solid  fill color
# color for the edges of the histogram, fill for fill in the histogram

Scatter plot

Basic Scatter plot

mtcars %>% 
  ggplot(aes(x = wt, y = mpg))+
  geom_point(size = 5, color = '#33CCCC')# color can also be defined using hex color like '#33CCCC', check https://www.color-hex.com/

Change the size of the points with variables in Scatter plot

Example 1:

mtcars %>% 
  ggplot(aes(x = wt, y = mpg)) +
  geom_point(aes(size = hp))

Example 2:

mtcars %>% 
  ggplot(aes(x = wt, y = mpg)) +
  geom_point(aes(size = factor(cyl)))#use factor() because cyl is not continues. cyl contains only even numbers (4,6,8)

Example 3:

mtcars %>% 
  ggplot(aes(x = wt, y = mpg)) +
  geom_point(aes(shape = factor(cyl), color = factor(cyl)), size = 4)#use factor() because cyl is not continues. cyl contains only even numbers (4,6,8)

Example 4:

mtcars %>% 
  ggplot(aes(x = wt, y = mpg)) +
  geom_point(aes(color = hp),size = 5) + # below is not necessay
  scale_color_gradient(low = 'blue', high = 'red')

Example 5:

txhousing %>% 
  ggplot(aes(x = sales, y = volume)) +
  geom_point(aes(color = sales), alpha = 0.6) +
  scale_color_gradient(low = 'blue', high = 'red')+
  geom_smooth(color = 'green')

Bar plot

Example 1:

mpg %>% 
  ggplot(aes(x = class)) +
  geom_bar( color = 'blue', fill = 'blue')

mpg %>% 
  ggplot(aes(x = hwy)) +
  geom_bar(fill = '#F59D92')

Example 2:

mpg %>% 
  ggplot(aes(x = class)) +
  geom_bar(aes(fill = drv))# use fill = drv to add more information to the bar plot; stacking bar plots 

mpg %>% 
  ggplot(aes(x = manufacturer)) +
  geom_bar(aes(fill = factor(cyl))) +
  coord_flip()

Example 3:

mpg %>% 
  ggplot(aes(x = class)) +
  geom_bar(aes(fill = drv), position = 'dodge')# position = 'dodge' gives multiple bar plot side by side instead of stacking 

Example 4:

mpg %>% 
  ggplot(aes(x = class)) +
  geom_bar(aes(fill = drv), position = 'fill')# position = 'fill' shows the percentage of each class

Boxplot

Example 1:

mtcars %>% 
  ggplot(aes(x = factor(cyl), y = mpg)) +
  geom_boxplot()

Example 2:

mtcars %>% 
  ggplot(aes(x = factor(cyl), y = mpg)) +
  geom_boxplot() +
  coord_flip()

Example 3:

mtcars %>% 
  ggplot(aes(x = factor(cyl), y = mpg)) +
  geom_boxplot(aes(fill = factor(cyl))) +
  theme_dark()

Variable Plotting

Eample 1:

movies %>% 
  ggplot(aes(x = year, y = rating)) +
  geom_bin2d()

Eample 2:

movies %>% 
  ggplot(aes(x = year, y = rating)) +
  geom_bin2d() + scale_fill_gradient(high = 'red', low = 'green')

Eample 3:

movies %>% 
  ggplot(aes(x = year, y = rating)) +
  geom_bin2d(binwidth = c(3,1)) + scale_fill_gradient(high = 'red', low = 'blue')

Eample 4:

movies %>% 
  ggplot(aes(x = year, y = rating)) +
  geom_hex() + scale_fill_gradient(high = 'red', low = 'blue')

Eample 5:

movies %>% 
  ggplot(aes(x = year, y = rating)) +
  geom_density2d() 

Coordanates Faceting

Example 1:

mpg %>% 
  ggplot(aes(x = displ, y = hwy)) +
  geom_point() +
  coord_cartesian(xlim = c(1, 4), ylim = c(15,30))

# xlim = c(low, high)

Example 2:

mpg %>% 
  ggplot(aes(x = displ, y = hwy)) +
  geom_point() +
  coord_fixed(ratio = 1/3)

# ratio = x/y

Example 3:

mpg %>% 
  ggplot(aes(x = displ, y = hwy)) +
  geom_point() +
  facet_grid(.~cyl)

# facet_grid(.~cyl) create multiple plots for different cyl values .~ cyl split on x-axis

Example 4:

mpg %>% 
  ggplot(aes(x = displ, y = hwy)) +
  geom_point() +
  facet_grid(drv~.)

# facet_grid(drv~.) create multiple plots for different cyl values drv~. split on y-axis
# Note .~var_name for x-axis
#     var_name~. for y-axis

Example 5:

mpg %>% 
  ggplot(aes(x = displ, y = hwy)) +
  geom_point() +
  facet_grid(drv~cyl)

# facet_grid(drv~cyl) split on both y-axis and x-axis
# (y ~ x)

Themes

Example 1:

  • theme_set(theme_minimal()) set the themes for all the plots
theme_set(theme_minimal())
mtcars %>% 
  ggplot(aes(x = wt, y = mpg)) +
  geom_point()

Example 2:

mtcars %>% 
  ggplot(aes(x = wt, y = mpg)) +
  geom_point() + theme_dark()

Example 3:

library(ggthemes) # use ?ggthemes to find out more themes 
mtcars %>% 
  ggplot(aes(x = wt, y = mpg)) +
  geom_point() + theme_economist()

Assignment

df <- read.csv('Economist_Assignment_Data.csv')
# head(df)
df %>% 
  ggplot(aes(x = CPI, y = HDI)) +
  geom_point(aes(color = Region))

df %>% 
  ggplot(aes(x = CPI, y = HDI)) +
  geom_point(aes(color = Region), shape = 'o', size = 3)

df %>% 
  ggplot(aes(x = CPI, y = HDI)) +
  geom_point(aes(color = Region), shape = 'o', size = 3) +
  geom_smooth(aes(group=1))

df %>% 
  ggplot(aes(x = CPI, y = HDI)) +
  geom_point(aes(color = Region), shape = 'o', size = 3) +
  geom_smooth(aes(group = 1),method = 'lm', formula = y ~ log(x), se = FALSE, color = 'red')

df %>% 
  ggplot(aes(x = CPI, y = HDI)) +
  geom_point(aes(color = Region), shape = 'o', size = 3) +
  geom_smooth(aes(group=1),method = 'lm', formula = y ~ log(x),
              se = FALSE, color = 'red') +
  geom_text(aes(label = Country, color = Region)) 

pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan", "Afghanistan", "Congo", "Greece", "Argentina", "Brazil", "India", "Italy", "China", "South Africa", "Spane", "Botswana", "Cape Verde", "Bhutan", "Rwanda", "France", "United States", "Germany", "Britain", "Barbados", "Norway", "Japan", "New Zealand", "Singapore")

df %>% 
  ggplot(aes(x = CPI, y = HDI)) +
  geom_point(aes(color = Region), shape = 'o', size = 3) +
  geom_smooth(method = 'lm', formula = y ~ log(x),
              se = FALSE, color = 'red') +
   geom_text(aes(label = Country), color = "gray20", 
                data = subset(df, Country %in% pointsToLabel),check_overlap = TRUE) +
  theme_bw() +
  scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",limits = c(.9, 10.5),breaks=1:10) +
  scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",limits = c(0.2, 1.0)) +
  ggtitle("Corruption and Human development") +
  theme_economist_white()

Label only a part of data

  1. Create a vector that contains all the labels to be showed on the plot, like pointsToLabel.
  2. In geom_text(), use data = subset(data_name, variable_name %in% vector_name, check_overlap = TRUE)
pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan", "Afghanistan", "Congo", "Greece", "Argentina", "Brazil", "India", "Italy", "China", "South Africa", "Spane", "Botswana", "Cape Verde", "Bhutan", "Rwanda", "France", "United States", "Germany", "Britain", "Barbados", "Norway", "Japan", "New Zealand", "Singapore")

df %>% 
  ggplot(aes(x = CPI, y = HDI)) +
  geom_point(aes(color = Region), shape = 'o', size = 3) +
  geom_smooth(method = 'lm', formula = y ~ log(x),
              se = FALSE, color = 'red') +
   geom_text(aes(label = Country), color = "gray20", 
                data = subset(df, Country %in% pointsToLabel),check_overlap = TRUE) +
  theme_bw() +
  scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",limits = c(.9, 10.5),breaks=1:10) +
  scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",limits = c(0.2, 1.0)) +
  ggtitle("Corruption and Human development") +
  theme_economist_white()

Plotly

Use plotly to create intereactive plots https://plotly.com/r/

ggplotly(mtcars %>%
  ggplot(aes(mpg ,wt)) +
  geom_point())